from transformers import BertModel, BertTokenizer
import torch
import torch.nn as nn

tokenizer = BertTokenizer.from_pretrained('bert-base-cased')


def sentence2tensor(sentence):
    tokens = tokenizer.tokenize(sentence)
    print(tokens)
    tokens_id = tokenizer.convert_tokens_to_ids(tokens)
    tokens_id_tensor = torch.tensor(tokens_id).unsqueeze(0)
    return tokens_id_tensor


class Model(nn.Module):
    def __init__(self):
        super().__init__()
        # cased 不区分大小写（只能输入小写）， case区分大小写
        # output_hidden_states输出中间（隐藏层）结果，即个encoder块的结果，包括：初始输入+12块结果 output_hidden_states=True
        self.embedding = BertModel.from_pretrained('bert-base-cased')

    def forward(self, X):
        X = self.embedding(X)
        print(X)
        return X


sentence = "what are you doing now ?"
model = Model()
res = model(sentence2tensor(sentence))
